We aim to use the maldifish appraoch to determine if mass spectral data classify according to symbiont/host tissue locations using both a spatial clustering framework and the discriminate analysis appraoch. Maldi data was peak-picked using PeakPicking.R script and processed to reduce dimensions (Data_Processing.R). Additionally, cluster analysis was preformed on the high-memory node in Cologne and resulting cluster data is in associated R Files. This script depends on the following RData Files : Cardinal_Processed_Data.RData, supervised-clustering-anlaysis.RData, unspervised-clustering-anlaysis.RData
Set up working space
rm(list=ls())
library(Cardinal)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colnames,
## do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff,
## sort, table, tapply, union, unique, unsplit
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: ProtGenerics
## Welcome to Cardinal (version 1.7.0)
##
## To get started, view the introductory vignettes with
## 'browseVignettes("Cardinal")'.
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(ggplot2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:Cardinal':
##
## combine, select
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dir<-"/home/maggie/Documents/Projects/maldifish/RAnalysis"
setwd(file.path(dir,'Data'))
load('Cardinal_Processed_Data.RData')
Add in a cross validation group for classification
pixelNo<-nrow(pData(maldifishmz))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(maldifishmz)$cvgroup<-as.vector(randNum)
First preform PCA with cardinal package
pca.mod<-PCA(maldifishmz,ncomp=4)
summary(pca.mod)
## PC1 PC2 PC3 PC4
## Standard deviation 8680.550952 7.057094e+03 5.026366e+03 4.667260e+03
## Proportion of Variance 0.150693 9.959791e-02 5.052505e-02 4.356346e-02
## Cumulative 0.150693 2.502909e-01 3.008159e-01 3.443794e-01
plot(summary(pca.mod))
Visualize PCA analysis
mycols<-gradient.colors(10,start='Blue', end='Red')
image(pca.mod, column=c('PC1','PC2','PC3'), superpose=F, col.regions=mycols)
image(pca.mod, column=c('PC1','PC2','PC3'), superpose=T, col.regions=mycols)
We can see clear seperation in the data based on PC components 1 and 2. We can also plot these points as “normal” score plots below colored by FISH annotation
## Set up data for plotting
pca.scores.red<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Red',])
pca.scores.red$Class<-'mox'
pca.scores.green<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Green',],Class='sox')
pca.scores.green$Class<-'sox'
pca.scores.tissue<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Tissue',],Class='host')
pca.scores.tissue$Class<-'host'
pca.scores.mixed<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Mixed',],Class='mixed')
pca.scores.mixed$Class<-'sox&mox'
pca.scores<-rbind(pca.scores.green,pca.scores.mixed,pca.scores.red,pca.scores.tissue)
pca.scores.reduced<-pca.scores[seq(1,nrow(pca.scores),15),]
ggplot(pca.scores.reduced, aes(x=PC1, y=PC2, color=Class)) + geom_point(alpha=0.5) + ggtitle('Plotting every 15th Pixel') ## Now plotting every 20 pixels or so to see distrubution of points
pca.scores.symbionts<-pca.scores[pca.scores$Class %in% c('mox','sox','mixed'),]
ggplot(pca.scores, aes(x=PC1, y=PC2, color=Class)) + geom_point(alpha=0.5) + geom_point(data=pca.scores.symbionts,aes( x=PC1, y=PC2, color=Class))
Clearly there is a large variation in the host tissue. Just to confirm, re-run PCA with just host pixels
maldifishmz.host<-maldifishmz[,maldifishmz$Class=='Tissue']
pca.mod.2<-PCA(maldifishmz.host,ncomp=3)
image(pca.mod.2, column=c('PC1','PC2','PC3'), superpose=F, col.regions=mycols)
pca.scores.red<-as.data.frame(pca.mod.2[[1]]$scores)
ggplot(pca.scores, aes(x=PC1, y=PC2)) + geom_point(alpha=0.5)
To confirm observation that ciliated edge host pixels cluster independently of the tissue pixels in the symbiont region, we can visualize the unsupervised clustering of the comlete dataset using spatially aware k-means clustering method.
Load in clustering data
setwd(file.path(dir,'Results'))
load('unsupervised-clustering-anlaysis.RData')
ls()
## [1] "df" "dir" "int_matrix"
## [4] "maldifishmz" "maldifishmz.host" "mycols"
## [7] "pca.mod" "pca.mod.2" "pca.scores"
## [10] "pca.scores.green" "pca.scores.mixed" "pca.scores.red"
## [13] "pca.scores.reduced" "pca.scores.symbionts" "pca.scores.tissue"
## [16] "peaks_in_data" "pixelNo" "randNum"
## [19] "skma" "skmg"
print('adaptive')
## [1] "adaptive"
summary(skma)
## r k method time Within-Cluster SS Between-Cluster SS Total SS
## 1 1 3 gaussian 25.299 9.197815e+12 1.495462e+12 1.069328e+13
## 2 1 5 gaussian 28.075 8.756170e+12 1.937107e+12 1.069328e+13
## 3 1 7 gaussian 32.455 8.429798e+12 2.263479e+12 1.069328e+13
## 4 2 3 gaussian 24.984 9.337381e+12 1.355896e+12 1.069328e+13
## 5 2 5 gaussian 28.335 8.954518e+12 1.738759e+12 1.069328e+13
## 6 2 7 gaussian 32.396 8.679523e+12 2.013755e+12 1.069328e+13
image(skma, key = T, layout = c(3, 2))
print('gaussian')
## [1] "gaussian"
summary(skmg)
## r k method time Within-Cluster SS Between-Cluster SS Total SS
## 1 1 3 gaussian 25.850 9.197815e+12 1.495462e+12 1.069328e+13
## 2 1 5 gaussian 27.884 8.756427e+12 1.936850e+12 1.069328e+13
## 3 1 7 gaussian 31.448 8.429798e+12 2.263479e+12 1.069328e+13
## 4 2 3 gaussian 25.179 9.337381e+12 1.355896e+12 1.069328e+13
## 5 2 5 gaussian 29.174 8.954518e+12 1.738759e+12 1.069328e+13
## 6 2 7 gaussian 31.902 8.679523e+12 2.013755e+12 1.069328e+13
image(skmg, key = T, layout = c(3,2))
In all cases, the ciliated edge clusters indepenentdely from the other tissue portions. Additionally, it looks like the adaptive and bausian methods preform about the same.
In this analysis a supervised method was used to cluster the dataset into groups (n=4) based on our classification calles through the FISH data.
Load in data
setwd(file.path(dir,'Results'))
load('supervised-clustering-anlaysis.RData')
ls()
## [1] "df" "dir" "int_matrix"
## [4] "maldifishmz" "maldifishmz.host" "mycols"
## [7] "pca.mod" "pca.mod.2" "pca.scores"
## [10] "pca.scores.green" "pca.scores.mixed" "pca.scores.red"
## [13] "pca.scores.reduced" "pca.scores.symbionts" "pca.scores.tissue"
## [16] "peaks_in_data" "pixelNo" "randNum"
## [19] "skma" "skmg" "ssca.cv"
## [22] "sscg.cv"
Summarize the supervised clustering analysis for both gaussian and adaptive methods
Gaussian
plot(summary(sscg.cv))
Adaptive
plot(summary(ssca.cv))
No difference bewteen the different radius adn the hitest accuracy value is at s=5, slightly more accuate for the adaptive method.
Plots the calls for the different cross validation groups for the adaptive method
image(ssca.cv, model=list(r=1, s=5))
These are the t-stats for the differnet ions that lead to differences between groups
ssc<-spatialShrunkenCentroids(maldifishmz,y=as.factor(maldifishmz$Class), r=3, s=5,method='adaptive')
plot(ssc, mode='tstatistics', model=list(r=3, s=5))
Summerize results here
summary(ssc)
## r k s method time Predicted # of Classes
## 1 3 4 5 adaptive 59.914 3
## Mean # of Features per Class
## 1 90
sigIons<-topLabels(ssc, n=90)
sigIons<-sigIons[order(sigIons$mz),]
sigIons
## mz r k s classes centers tstatistics p.values
## 22 459.2494 3 4 5 Tissue 207.86158 7.844322 4.662937e-15
## 65 462.3349 3 4 5 Tissue 35.77109 3.281688 1.034211e-03
## 69 474.2589 3 4 5 Red 423.92051 3.037532 2.404410e-03
## 25 478.3303 3 4 5 Tissue 880.53828 7.584024 3.552714e-14
## 47 479.3339 3 4 5 Tissue 79.85195 4.632786 3.637808e-06
## 1 480.3454 3 4 5 Tissue 1481.00227 17.993608 0.000000e+00
## 6 481.3494 3 4 5 Tissue 196.80591 12.528334 0.000000e+00
## 80 482.3246 3 4 5 Tissue 31.32135 2.495625 1.258408e-02
## 4 482.3613 3 4 5 Tissue 316.02818 13.919077 0.000000e+00
## 66 483.3637 3 4 5 Tissue 25.66951 3.200104 1.376612e-03
## 51 490.2333 3 4 5 Red 248.01484 4.525730 6.237033e-06
## 2 496.3401 3 4 5 Tissue 11295.80337 16.849117 0.000000e+00
## 3 497.3437 3 4 5 Tissue 2399.58636 15.546633 0.000000e+00
## 89 498.3454 3 4 5 Tissue 41.28434 1.870626 6.141626e-02
## 40 498.3474 3 4 5 Tissue 83.87219 5.607212 2.092010e-08
## 10 502.3281 3 4 5 Tissue 506.67680 11.060725 0.000000e+00
## 31 504.3434 3 4 5 Tissue 121.45224 6.131170 8.939853e-10
## 67 508.3415 3 4 5 Tissue 43.72303 3.199340 1.380262e-03
## 35 508.3760 3 4 5 Tissue 41.40970 5.848870 5.051189e-09
## 12 510.3565 3 4 5 Tissue 98.77363 10.707216 0.000000e+00
## 75 516.3069 3 4 5 Red 9712.45095 2.856516 4.310699e-03
## 26 518.3017 3 4 5 Tissue 252.08832 7.523249 5.639933e-14
## 36 518.3224 3 4 5 Tissue 9306.37008 5.836749 5.431669e-09
## 90 520.3291 3 4 5 Tissue 50.55427 1.821545 6.854376e-02
## 21 522.3560 3 4 5 Tissue 2588.51163 7.865679 3.996803e-15
## 19 523.3599 3 4 5 Tissue 375.58364 8.386139 0.000000e+00
## 8 524.3720 3 4 5 Tissue 138.51303 11.285756 0.000000e+00
## 43 530.3226 3 4 5 Tissue 237.92587 5.192522 2.101487e-07
## 84 532.2799 3 4 5 Red 4976.77339 2.151632 3.150064e-02
## 85 532.3012 3 4 5 Green 419.24370 2.075937 3.798435e-02
## 81 532.3757 3 4 5 Tissue 33.34248 2.300437 2.143700e-02
## 83 534.2957 3 4 5 Tissue 4615.54837 2.171540 2.990581e-02
## 42 535.2990 3 4 5 Tissue 646.29543 5.292115 1.225941e-07
## 53 536.3706 3 4 5 Tissue 51.39487 4.395117 1.114661e-05
## 41 536.3728 3 4 5 Tissue 53.92360 5.313401 1.091202e-07
## 33 536.4071 3 4 5 Tissue 61.51614 6.079468 1.234742e-09
## 58 536.4093 3 4 5 Tissue 41.14476 3.608140 3.093933e-04
## 72 540.3070 3 4 5 Red 1179.53929 2.972576 2.975046e-03
## 74 548.2752 3 4 5 Green 189.36344 2.897298 3.791042e-03
## 27 550.3869 3 4 5 Tissue 2062.86791 7.086251 1.438849e-12
## 48 551.3917 3 4 5 Tissue 239.85997 4.621702 3.837435e-06
## 30 553.4356 3 4 5 Tissue 104.96280 6.732384 1.729017e-11
## 77 556.2809 3 4 5 Red 557.62306 2.694857 7.078533e-03
## 52 558.3541 3 4 5 Tissue 44.01010 4.412965 1.026684e-05
## 82 569.4286 3 4 5 Tissue 1604.32223 2.298746 2.153292e-02
## 49 570.3541 3 4 5 Red 2705.91514 4.555423 5.422511e-06
## 29 570.4340 3 4 5 Tissue 349.63355 6.740372 1.636868e-11
## 56 577.2627 3 4 5 Red 4970.27960 3.667316 2.490824e-04
## 88 586.3269 3 4 5 Red 1020.48805 1.902127 5.724391e-02
## 45 613.4197 3 4 5 Tissue 162.54107 5.080484 3.809502e-07
## 62 625.5063 3 4 5 Tissue 40.99535 3.416768 6.353567e-04
## 11 632.3566 3 4 5 Tissue 618.11557 10.773118 0.000000e+00
## 34 633.3590 3 4 5 Tissue 93.68295 5.941529 2.885639e-09
## 71 635.4017 3 4 5 Tissue 741.02815 2.992872 2.768114e-03
## 14 643.5176 3 4 5 Tissue 370.17361 9.137486 0.000000e+00
## 73 644.5223 3 4 5 Tissue 48.44834 2.928105 3.415420e-03
## 57 658.3715 3 4 5 Tissue 168.06113 3.640514 2.729995e-04
## 9 665.4992 3 4 5 Tissue 817.87161 11.187861 0.000000e+00
## 18 666.5035 3 4 5 Tissue 181.76378 8.493339 0.000000e+00
## 13 681.4735 3 4 5 Tissue 407.66598 10.247337 0.000000e+00
## 59 682.4774 3 4 5 Tissue 63.41344 3.600614 3.184807e-04
## 37 686.4033 3 4 5 Tissue 286.27535 5.825897 5.795905e-09
## 79 687.4062 3 4 5 Tissue 47.36445 2.522705 1.165575e-02
## 68 740.5591 3 4 5 Tissue 100.51170 3.056240 2.245217e-03
## 70 742.5761 3 4 5 Tissue 46.88954 3.012100 2.598752e-03
## 15 756.5536 3 4 5 Tissue 658.87753 8.995214 0.000000e+00
## 44 756.5567 3 4 5 Tissue 218.63893 5.136546 2.833046e-07
## 87 757.5560 3 4 5 Tissue 90.36813 1.967580 4.913454e-02
## 23 757.5590 3 4 5 Tissue 193.83776 7.744197 1.021405e-14
## 28 768.3716 3 4 5 Tissue 223.01212 6.915777 4.839240e-12
## 78 768.5898 3 4 5 Tissue 77.07392 2.563905 1.035975e-02
## 60 769.3772 3 4 5 Tissue 54.07894 3.537992 4.043883e-04
## 20 778.5377 3 4 5 Tissue 658.39340 8.336311 0.000000e+00
## 24 779.5380 3 4 5 Tissue 180.56083 7.698116 1.465494e-14
## 32 784.5870 3 4 5 Tissue 750.95118 6.094671 1.123192e-09
## 38 785.5887 3 4 5 Tissue 281.44648 5.725900 1.048367e-08
## 17 794.5099 3 4 5 Tissue 391.76970 8.729114 0.000000e+00
## 64 795.5148 3 4 5 Tissue 73.00089 3.308685 9.395372e-04
## 46 801.5164 3 4 5 Tissue 62.92512 4.805494 1.558492e-06
## 54 804.5519 3 4 5 Tissue 152.71050 4.233637 2.312965e-05
## 16 829.6446 3 4 5 Tissue 292.66759 8.740340 0.000000e+00
## 55 830.6474 3 4 5 Tissue 92.92830 4.211453 2.551992e-05
## 61 857.6752 3 4 5 Tissue 339.10801 3.476290 5.098254e-04
## 86 890.5555 3 4 5 Tissue 27.37432 2.045631 4.080997e-02
## 5 892.5704 3 4 5 Tissue 512.03832 13.778715 0.000000e+00
## 7 893.5742 3 4 5 Tissue 205.71450 11.400863 0.000000e+00
## 63 918.5877 3 4 5 Tissue 70.93409 3.317225 9.113030e-04
## 50 920.6035 3 4 5 Tissue 348.76897 4.539954 5.669974e-06
## 39 921.6056 3 4 5 Tissue 219.29355 5.675392 1.409046e-08
## 76 991.6730 3 4 5 Tissue 22.75901 2.741410 6.124793e-03
## adj.p.values
## 22 1.685893e-12
## 65 8.961740e-02
## 69 1.895506e-01
## 25 1.128794e-11
## 47 4.828154e-04
## 1 0.000000e+00
## 6 0.000000e+00
## 80 8.512523e-01
## 4 0.000000e+00
## 66 1.157764e-01
## 51 7.431283e-04
## 2 0.000000e+00
## 3 0.000000e+00
## 89 1.000000e+00
## 40 3.655788e-06
## 10 0.000000e+00
## 31 1.994348e-07
## 67 1.157764e-01
## 35 9.807725e-07
## 12 0.000000e+00
## 75 3.182935e-01
## 26 1.739256e-11
## 36 1.035474e-06
## 90 1.000000e+00
## 21 1.496660e-12
## 19 0.000000e+00
## 8 0.000000e+00
## 43 3.389860e-05
## 84 1.000000e+00
## 85 1.000000e+00
## 81 1.000000e+00
## 83 1.000000e+00
## 42 2.008436e-05
## 53 1.270350e-03
## 41 1.816072e-05
## 33 2.642095e-07
## 58 3.031765e-02
## 72 2.276888e-01
## 74 2.819083e-01
## 27 4.190648e-10
## 48 5.029438e-04
## 30 4.316367e-09
## 77 4.981102e-01
## 52 1.182943e-03
## 82 1.000000e+00
## 49 6.611050e-04
## 29 4.185992e-09
## 56 2.560420e-02
## 88 1.000000e+00
## 45 5.788786e-05
## 62 5.742858e-02
## 11 0.000000e+00
## 34 5.708666e-07
## 71 2.134094e-01
## 14 0.000000e+00
## 73 2.594976e-01
## 57 2.752308e-02
## 9 0.000000e+00
## 18 0.000000e+00
## 13 0.000000e+00
## 59 3.063550e-02
## 37 1.085180e-06
## 79 7.935749e-01
## 68 1.824891e-01
## 70 2.022901e-01
## 15 0.000000e+00
## 44 4.433505e-05
## 87 1.000000e+00
## 23 3.569811e-12
## 28 1.333247e-09
## 78 7.193510e-01
## 60 3.819830e-02
## 20 0.000000e+00
## 24 4.956680e-12
## 32 2.453472e-07
## 38 1.928443e-06
## 17 0.000000e+00
## 64 8.209207e-02
## 46 2.150103e-04
## 54 2.579940e-03
## 16 0.000000e+00
## 55 2.816593e-03
## 61 4.772785e-02
## 86 1.000000e+00
## 5 0.000000e+00
## 7 0.000000e+00
## 63 8.029422e-02
## 50 6.833296e-04
## 39 2.547215e-06
## 76 4.398525e-01
Looks like we can accurately predict the mox signals from the tissue signals, but definitely can’t classify out the mixed signals (we do get some significant ions for the sox) Maybe worth repeating by taking out the mixed signal and running more cvs and potentially adding in a category for the ciliated edge tissue sections
Next steps: Res-assign class groups by calling pixels from ciliated edge, cilated (in the clustering analysis this correpsonds to cluster group 1) & run OPLS-DA
clust.groups<-skma$cluster$`r = 2, k = 7`
group1<-clust.groups[which(clust.groups==1)]
grp1<-names(group1)
msidata<-maldifishmz
pData(msidata)[rownames(pData(msidata)) %in% grp1,'Class']<-'CiliatedEdge'
msidata.nociliatededge<-msidata[,msidata$Class %in% c('Red','Green','Tissue')]
pca.nociliated<-PCA(msidata.nociliatededge,ncomp=3)
plot(summary(pca.nociliated))
image(pca.nociliated, column=c('PC1','PC2'), superpose=T)
Comparing ciliated edge vs. tissue
ciliatedEdge<-msidata[,msidata$Class %in% c('CiliatedEdge', 'Tissue')]
table(ciliatedEdge$Class)
##
## CiliatedEdge Tissue
## 2197 14914
ddd<-data.frame(pData(ciliatedEdge))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(ciliatedEdge) %>% group_by(Class) %>% sample_n(size=1000,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
ciliatedEdge.subset<-ciliatedEdge[,rownames(pData(ciliatedEdge)) %in% coordsSubset]
ciliatedEdge.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
## Cardinal version: 1.7.0
## Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
## /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd
## Normalization: tic
## Smoothing:
## Baseline reduction:
## Spectrum representation:
## Peak picking: adaptive
##
## Slot "experimentData":
## Experiment data
## Experimenter name:
## Laboratory:
## Contact:
## Title:
## URL:
## PMIDs:
## No abstract available.
##
## Slot "imageData":
## An object of class 'MSImageData'
## iData: 3495 x 2000 matrix (53.3 Mb)
## mzData: 3495 x 2000 Hashmat (26.3 Mb)
## peakData: 3495 x 2000 Hashmat (26.3 Mb)
##
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
## pixelNames: x = 4, y = 1 x = 111, y = 1 ... x = 219, y = 233
## (2000 total)
## varLabels: x y ... cvgroup (5 total)
## varMetadata: labelType labelDescription
##
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
## featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
## (3495 total)
## varLabels: mz
## varMetadata: labelDescription
##
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
##
## Slot ".__classVersion__":
## R Biobase iSet SImageSet MSImageSet
## "3.2.4" "2.30.0" "0.1.0" "0.1.0" "0.7.0"
table(ciliatedEdge.subset$Class)
##
## CiliatedEdge Tissue
## 1000 1000
pixelNo<-nrow(pData(ciliatedEdge.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(ciliatedEdge.subset)$cvgroup<-as.vector(randNum)
OPLS
ciliated.opls.cv<-cvApply(ciliatedEdge.subset, .y = as.factor(ciliatedEdge.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(ciliated.opls.cv))
Re-run opls with correct number of components as predicted from model above and visualize
comp<-7
ciliated.opls<-OPLS(x=ciliatedEdge.subset,y=as.factor(ciliatedEdge.subset$Class), ncomp=comp)
summary(ciliated.opls)
## $`ncomp = 7`
## CiliatedEdge Tissue
## Accuracy 0.95750000 0.95750000
## Sensitivity 0.96900000 0.94600000
## Specificity 0.94600000 0.96900000
## FDR 0.05278592 0.03172979
plot(ciliated.opls)
topLabels(ciliated.opls, n=50)
## mz ncomp column coefficients loadings Oloadings
## 1 496.3401 7 CiliatedEdge 3.239119e-05 0.73613222 0.2399379697
## 2 518.3224 7 CiliatedEdge 1.103051e-05 0.24458378 0.3822193592
## 3 480.3454 7 CiliatedEdge 9.511102e-06 0.20801412 0.1314120541
## 4 497.3437 7 CiliatedEdge 9.462231e-06 0.21253774 0.1005666557
## 5 494.3251 7 CiliatedEdge 7.608535e-06 0.16592838 0.4745530852
## 6 577.2627 7 Tissue 7.487072e-06 -0.16765613 0.5086171706
## 7 534.2957 7 CiliatedEdge 6.998882e-06 0.15637335 0.1870215360
## 8 522.3560 7 CiliatedEdge 6.138194e-06 0.13635246 0.0607716237
## 9 516.3069 7 Tissue 5.563815e-06 -0.13185490 0.3278247546
## 10 546.4886 7 Tissue 4.536568e-06 -0.10211872 -0.4405931083
## 11 550.3869 7 CiliatedEdge 4.519819e-06 0.10160582 0.0372827860
## 12 569.4286 7 CiliatedEdge 4.053478e-06 0.09421348 0.0295034711
## 13 570.3541 7 Tissue 4.010556e-06 -0.08889690 -0.1134202202
## 14 770.5106 7 Tissue 3.569562e-06 -0.07967657 -0.1068790622
## 15 665.4992 7 CiliatedEdge 3.309602e-06 0.06879067 0.0726091983
## 16 754.5348 7 Tissue 3.246675e-06 -0.06844027 -0.1855584296
## 17 823.6794 7 Tissue 3.228638e-06 -0.07525794 -0.1346922135
## 18 632.3566 7 CiliatedEdge 3.175474e-06 0.06784323 -0.0310399584
## 19 495.3286 7 CiliatedEdge 3.129669e-06 0.06734487 0.0514867401
## 20 756.5536 7 CiliatedEdge 3.128505e-06 0.06822424 0.0636790508
## 21 502.3281 7 CiliatedEdge 3.018987e-06 0.06340708 0.0384983302
## 22 482.3613 7 CiliatedEdge 3.007241e-06 0.06625163 -0.0088338343
## 23 643.5176 7 CiliatedEdge 2.992980e-06 0.06479979 0.0276743218
## 24 569.4309 7 CiliatedEdge 2.916489e-06 0.05526297 0.2775774135
## 25 532.2799 7 Tissue 2.878674e-06 -0.06474777 0.1836766007
## 26 732.5543 7 Tissue 2.814010e-06 -0.06078307 0.0799069167
## 27 478.3303 7 CiliatedEdge 2.809902e-06 0.05787972 0.0066386560
## 28 754.5378 7 Tissue 2.710598e-06 -0.06700728 0.0272147063
## 29 635.4017 7 CiliatedEdge 2.530791e-06 0.05588484 0.1025697491
## 30 869.5375 7 Tissue 2.514012e-06 -0.05398143 -0.1140936948
## 31 535.2990 7 CiliatedEdge 2.493738e-06 0.05334045 0.0449067146
## 32 808.5847 7 Tissue 2.323568e-06 -0.05347421 -0.0898550524
## 33 892.5704 7 CiliatedEdge 2.318222e-06 0.04641680 -0.0349947121
## 34 805.6436 7 Tissue 2.292768e-06 -0.05312478 0.0761496745
## 35 681.4735 7 CiliatedEdge 2.257211e-06 0.04576850 0.0065494575
## 36 570.4340 7 CiliatedEdge 2.232608e-06 0.04728351 0.0553108010
## 37 519.3248 7 CiliatedEdge 2.220022e-06 0.04968112 0.0336779326
## 38 879.7426 7 Tissue 2.218522e-06 -0.05173264 -0.0752318014
## 39 548.3717 7 Tissue 2.139588e-06 -0.04445397 -0.2214521762
## 40 481.3494 7 CiliatedEdge 2.102943e-06 0.04557053 -0.0030095318
## 41 544.3375 7 CiliatedEdge 1.990968e-06 0.03776119 0.1237484724
## 42 523.3599 7 CiliatedEdge 1.968860e-06 0.04171723 0.0021593678
## 43 518.3017 7 CiliatedEdge 1.906351e-06 0.03897443 0.0005299637
## 44 839.6536 7 Tissue 1.869182e-06 -0.04179129 -0.0716935980
## 45 519.3268 7 CiliatedEdge 1.763878e-06 0.03544100 0.0221486369
## 46 586.3269 7 Tissue 1.745420e-06 -0.03726299 -0.0452867947
## 47 778.5377 7 CiliatedEdge 1.744804e-06 0.03206470 0.0341373902
## 48 721.0795 7 Tissue 1.707629e-06 -0.03026874 0.1184354413
## 49 524.3720 7 CiliatedEdge 1.621478e-06 0.03531604 -0.0123867351
## 50 595.4452 7 CiliatedEdge 1.620791e-06 0.03207545 0.0370568806
## weights Oweights
## 1 0.72298096 0.0308213359
## 2 0.24620433 -0.0068450540
## 3 0.21229064 0.0451899960
## 4 0.21119981 -0.0462485676
## 5 0.16982477 0.2724820005
## 6 -0.16711369 0.2985943475
## 7 0.15621713 -0.0001363059
## 8 0.13700633 0.0155491867
## 9 -0.12418601 0.2636653756
## 10 -0.10125756 -0.3282073752
## 11 0.10088372 0.0335348904
## 12 0.09047484 0.0209166992
## 13 -0.08951680 -0.0377595386
## 14 -0.07967369 0.1209882998
## 15 0.07387130 0.0542384786
## 16 -0.07246676 0.0075499672
## 17 -0.07206416 0.0669468311
## 18 0.07087752 0.0192262743
## 19 0.06985515 0.0506636213
## 20 0.06982916 0.0279621469
## 21 0.06738469 0.0549137055
## 22 0.06712251 0.0409688294
## 23 0.06680420 -0.0297323227
## 24 0.06509690 0.0952966239
## 25 -0.06425286 0.1864612595
## 26 -0.06280954 0.1565439139
## 27 0.06271785 0.0936287283
## 28 -0.06050136 0.0087573820
## 29 0.05648801 0.0293923567
## 30 -0.05611349 -0.0474400014
## 31 0.05566098 0.0255715041
## 32 -0.05186272 -0.0275504364
## 33 0.05174340 0.0274346264
## 34 -0.05117526 0.0674049890
## 35 0.05038163 0.0144389895
## 36 0.04983247 0.0476811468
## 37 0.04955156 0.0577411517
## 38 -0.04951808 0.0491521881
## 39 -0.04775625 -0.0804786883
## 40 0.04693830 0.0142450423
## 41 0.04443900 0.1479899594
## 42 0.04394553 -0.0067532256
## 43 0.04255033 -0.0243703786
## 44 -0.04172070 0.0463306299
## 45 0.03937029 0.0072351396
## 46 -0.03895830 -0.0304521941
## 47 0.03894454 0.0446536620
## 48 -0.03811480 0.0973690559
## 49 0.03619188 -0.0118912863
## 50 0.03617655 0.0423539955
image(ciliated.opls, model=list(ncomp=comp))
scores<-ciliated.opls@resultData$`ncomp = 7`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C4, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
Conclusions: There are clear differences in the metabolome between the tissue pixels and the ciliated edge pixels. This is intriguing because it suggests that either the host is somehow controlling the symbiont assemablges at the cilaited edge of the tissue through secretion of interesting metabololites (check annotations) or the symbionts are some how re-modeling the tissue structures inside the host tissue. Could be a nice interesting comparison of how hosts are different with bacteria and without.
Either way, we will need to keep the ciliated edge pixels out of the complete analysis.
msidata<-msidata[,!msidata$Class %in% c('Mixed')]
table(msidata$Class)
##
## CiliatedEdge Green Red Tissue
## 2197 788 3022 14914
ddd<-data.frame(pData(msidata))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(msidata) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
msidata.subset<-msidata[,rownames(pData(msidata)) %in% coordsSubset]
msidata.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
## Cardinal version: 1.7.0
## Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
## /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd
## Normalization: tic
## Smoothing:
## Baseline reduction:
## Spectrum representation:
## Peak picking: adaptive
##
## Slot "experimentData":
## Experiment data
## Experimenter name:
## Laboratory:
## Contact:
## Title:
## URL:
## PMIDs:
## No abstract available.
##
## Slot "imageData":
## An object of class 'MSImageData'
## iData: 3495 x 2800 matrix (74.7 Mb)
## mzData: 3495 x 2800 Hashmat (34.1 Mb)
## peakData: 3495 x 2800 Hashmat (34.1 Mb)
##
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
## pixelNames: x = 31, y = 1 x = 37, y = 1 ... x = 225, y = 233
## (2800 total)
## varLabels: x y ... cvgroup (5 total)
## varMetadata: labelType labelDescription
##
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
## featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
## (3495 total)
## varLabels: mz
## varMetadata: labelDescription
##
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
##
## Slot ".__classVersion__":
## R Biobase iSet SImageSet MSImageSet
## "3.2.4" "2.30.0" "0.1.0" "0.1.0" "0.7.0"
table(msidata.subset$Class)
##
## CiliatedEdge Green Red Tissue
## 700 700 700 700
pixelNo<-nrow(pData(msidata.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(msidata.subset)$cvgroup<-as.vector(randNum)
OPLS cv analysis
pls.cv<-cvApply(msidata.subset, .y = as.factor(msidata.subset$Class), .fun = "PLS", ncomp = 1:25, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(pls.cv))
Visualization
comp<-20
pls<-PLS(x=msidata.subset,y=as.factor(msidata.subset$Class), ncomp=comp)
summary(pls)
## $`ncomp = 20`
## CiliatedEdge Green Red Tissue
## Accuracy 0.97821429 0.7857143 0.7910714 0.7892857
## Sensitivity 0.98000000 0.5728571 0.5985714 0.5371429
## Specificity 0.97761905 0.8566667 0.8552381 0.8733333
## FDR 0.06412005 0.4287749 0.4204703 0.4143302
plot(pls)
topLabels(pls, n=50)
## mz ncomp column coefficients loadings weights
## 1 870.5432 20 Tissue 4.541147e-05 -0.0627238578 1.712640e-02
## 2 471.0298 20 Tissue 4.004871e-05 -0.0235705217 4.469432e-02
## 3 577.2604 20 Tissue 3.910770e-05 -0.0018499513 1.823777e-02
## 4 532.3012 20 Green 3.825126e-05 -0.0331103456 1.695989e-02
## 5 413.2668 20 Red 3.796696e-05 0.0241464634 -6.773192e-03
## 6 476.3141 20 CiliatedEdge 3.651823e-05 -0.0037021743 1.919038e-02
## 7 621.4612 20 Green 3.558655e-05 -0.0840761925 1.538020e-02
## 8 840.6551 20 Green 3.367640e-05 -0.0218645141 -3.588757e-02
## 9 520.3395 20 Red 3.317180e-05 0.0445773823 2.195467e-02
## 10 595.4452 20 Red 3.283511e-05 0.0330867518 2.187287e-02
## 11 785.5887 20 Tissue 3.250745e-05 0.0130222003 1.006211e-02
## 12 519.4972 20 Red 3.176385e-05 -0.0117457424 -8.408236e-02
## 13 826.6996 20 Red 3.095984e-05 0.0213342186 3.904842e-02
## 14 721.0824 20 Red 3.080321e-05 -0.0206364275 -3.766001e-02
## 15 857.0956 20 Red 3.075795e-05 0.0715318939 1.416225e-02
## 16 768.3716 20 CiliatedEdge 3.001690e-05 0.0164817310 2.597013e-02
## 17 804.5519 20 Tissue 2.894711e-05 0.0004555756 1.465559e-03
## 18 518.3017 20 CiliatedEdge 2.881403e-05 0.0122909214 3.961809e-03
## 19 474.2589 20 Red 2.860703e-05 0.0538503472 -6.388303e-03
## 20 518.3245 20 Green 2.860300e-05 -0.1426045391 1.443283e-02
## 21 547.4731 20 Green 2.825669e-05 0.0527408117 1.274640e-01
## 22 784.5838 20 Red 2.811974e-05 0.0860481999 1.777998e-02
## 23 610.1845 20 Tissue 2.786738e-05 0.0130476844 6.467886e-03
## 24 716.1269 20 Red 2.747382e-05 -0.0051457776 -2.017616e-02
## 25 895.7178 20 Green 2.724556e-05 -0.0375363084 1.168511e-02
## 26 548.2752 20 Green 2.697298e-05 -0.0115935734 -3.168529e-02
## 27 684.3883 20 Red 2.617944e-05 0.0124060039 2.280228e-02
## 28 817.1032 20 Red 2.615651e-05 -0.0016019814 -4.849431e-02
## 29 758.5687 20 Tissue 2.577813e-05 0.0243922987 2.191520e-02
## 30 768.5160 20 Red 2.512371e-05 -0.0020239220 2.459641e-02
## 31 682.0926 20 Tissue 2.488668e-05 0.0109732232 1.646473e-02
## 32 558.4859 20 Tissue 2.485806e-05 -0.0333801506 -7.439765e-02
## 33 758.5718 20 Tissue 2.465786e-05 0.0227073496 1.391735e-02
## 34 611.4037 20 Red 2.456193e-05 0.0085816064 1.829237e-02
## 35 490.2333 20 Red 2.447344e-05 0.0053377393 -8.191218e-05
## 36 535.2990 20 Tissue 2.436977e-05 0.0097259210 3.155073e-02
## 37 809.5847 20 Green 2.399579e-05 -0.0338460025 -3.456956e-02
## 38 923.6206 20 Tissue 2.397596e-05 -0.0164779855 -3.602164e-03
## 39 953.1206 20 Tissue 2.383393e-05 0.0281042667 -7.450008e-04
## 40 586.0689 20 Tissue 2.362250e-05 -0.0151307557 -3.496996e-02
## 41 585.0641 20 Tissue 2.338168e-05 -0.0934221883 -3.391507e-02
## 42 494.3231 20 Green 2.335323e-05 -0.0287731605 -3.577775e-02
## 43 752.5214 20 Tissue 2.329270e-05 -0.0311219037 1.667660e-02
## 44 510.3197 20 Green 2.224570e-05 -0.0132914082 8.012489e-03
## 45 894.5863 20 Red 2.202010e-05 0.0167391101 3.496549e-02
## 46 994.1155 20 Red 2.196977e-05 0.0294718949 3.322391e-02
## 47 738.0573 20 Red 2.152803e-05 0.0259167203 1.932512e-02
## 48 880.7461 20 Green 2.141732e-05 0.0241370558 5.004728e-02
## 49 756.5536 20 CiliatedEdge 2.141574e-05 0.0652108593 1.206379e-02
## 50 607.0465 20 Tissue 2.133901e-05 -0.0193759134 2.695736e-02
image(pls, model=list(ncomp=comp))
scores<-pls@resultData$`ncomp = 20`$scores
classes<-pls@resultData$`ncomp = 20`$y
pls.scores<-data.frame(scores, Classes=classes)
ggplot(pls.scores, aes(x=C1, y=C2, color=Classes)) + geom_point()
ggplot(pls.scores, aes(x=C1, y=C3, color=Classes)) + geom_point()
ggplot(pls.scores, aes(x=C2, y=C3, color=Classes)) + geom_point()
ggplot(pls.scores, aes(x=C1, y=C20, color=Classes)) + geom_point()
Three comparisons pairwise comparisons
host vs. mox host vs. sox sox vs. mox
table(msidata.nociliatededge$Class)
##
## Green Red Tissue
## 788 3022 14914
ddd<-data.frame(pData(msidata.nociliatededge))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(msidata.nociliatededge) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
nce.subset<-msidata.nociliatededge[,rownames(pData(msidata.nociliatededge)) %in% coordsSubset]
nce.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
## Cardinal version: 1.7.0
## Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
## /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd
## Normalization: tic
## Smoothing:
## Baseline reduction:
## Spectrum representation:
## Peak picking: adaptive
##
## Slot "experimentData":
## Experiment data
## Experimenter name:
## Laboratory:
## Contact:
## Title:
## URL:
## PMIDs:
## No abstract available.
##
## Slot "imageData":
## An object of class 'MSImageData'
## iData: 3495 x 2100 matrix (56 Mb)
## mzData: 3495 x 2100 Hashmat (24.2 Mb)
## peakData: 3495 x 2100 Hashmat (24.2 Mb)
##
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
## pixelNames: x = 26, y = 1 x = 31, y = 1 ... x = 225, y = 233
## (2100 total)
## varLabels: x y ... cvgroup (5 total)
## varMetadata: labelType labelDescription
##
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
## featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
## (3495 total)
## varLabels: mz
## varMetadata: labelDescription
##
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
##
## Slot ".__classVersion__":
## R Biobase iSet SImageSet MSImageSet
## "3.2.4" "2.30.0" "0.1.0" "0.1.0" "0.7.0"
table(nce.subset$Class)
##
## Green Red Tissue
## 700 700 700
## Assign numbers for CV groups
pixelNo<-nrow(pData(nce.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(nce.subset)$cvgroup<-as.vector(randNum)
nce.pls.cv<-cvApply(nce.subset, .y = as.factor(nce.subset$Class), .fun = "PLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(nce.pls.cv))
comp<-3
nce.pls<-PLS(x=nce.subset,y=as.factor(nce.subset$Class), ncomp=comp)
summary(nce.pls)
## $`ncomp = 3`
## Green Red Tissue
## Accuracy 0.6347619 0.65190476 0.6152381
## Sensitivity 0.6957143 0.04571429 0.6114286
## Specificity 0.6042857 0.95500000 0.6171429
## FDR 0.5321806 0.66315789 0.5560166
plot(nce.pls)
topLabels(nce.pls, n=50)
## mz ncomp column coefficients loadings weights
## 1 665.4992 3 Tissue 1.490006e-05 -0.075846374 -0.092410295
## 2 895.7178 3 Green 1.351666e-05 0.028883947 0.076503804
## 3 532.2820 3 Green 1.231486e-05 0.108419037 0.013681086
## 4 839.6536 3 Green 1.216437e-05 0.018969337 -0.001047237
## 5 532.3012 3 Green 1.197690e-05 0.049835419 0.065886862
## 6 494.3251 3 Green 1.137939e-05 0.202689986 -0.234872802
## 7 859.6913 3 Green 1.120205e-05 -0.011779078 0.077638725
## 8 547.4731 3 Green 1.115032e-05 0.035285717 0.048682625
## 9 868.5711 3 Green 1.084311e-05 0.108354338 0.130314280
## 10 823.6794 3 Green 1.077731e-05 -0.043241954 -0.062884416
## 11 562.4833 3 Green 1.011369e-05 0.023559675 0.020769387
## 12 621.4612 3 Green 1.006500e-05 -0.013630506 0.028189995
## 13 510.3197 3 Green 9.777445e-06 0.044244627 0.066130917
## 14 544.3375 3 Tissue 9.453162e-06 -0.162549634 -0.041087405
## 15 480.3454 3 Tissue 9.351501e-06 0.019160301 -0.031163098
## 16 577.2627 3 Tissue 9.060154e-06 -0.411738604 -0.106523988
## 17 548.2752 3 Green 8.979246e-06 0.026212739 0.057256287
## 18 880.7461 3 Green 8.945407e-06 0.003393503 0.028878754
## 19 544.3397 3 Green 8.298925e-06 0.037455948 0.006338039
## 20 567.4141 3 Green 8.232806e-06 -0.068622082 -0.159505054
## 21 572.3699 3 Tissue 8.205961e-06 -0.110845676 -0.079505483
## 22 532.2799 3 Green 8.180554e-06 -0.164321720 -0.163295203
## 23 732.5543 3 Tissue 8.085319e-06 -0.209110308 -0.145004007
## 24 586.3292 3 Green 7.639761e-06 0.017705397 0.036136556
## 25 540.3070 3 Tissue 7.579065e-06 -0.101666881 -0.119383082
## 26 896.7180 3 Green 7.503089e-06 0.012333013 0.037644458
## 27 681.4735 3 Tissue 7.502975e-06 -0.022193216 -0.043572797
## 28 533.2836 3 Green 7.307487e-06 0.001251660 0.023695513
## 29 517.3095 3 Tissue 7.231884e-06 -0.103363662 -0.102026755
## 30 840.6551 3 Green 6.912892e-06 0.004585491 0.031484881
## 31 746.5342 3 Green 6.830426e-06 0.015768128 0.053567311
## 32 569.4309 3 Tissue 6.814505e-06 -0.037894582 -0.065635829
## 33 494.3231 3 Green 6.756069e-06 0.009996435 0.061858609
## 34 502.3281 3 Tissue 6.736477e-06 -0.012214431 -0.048431213
## 35 643.5176 3 Tissue 6.669014e-06 -0.014812609 -0.041597146
## 36 784.5870 3 Tissue 6.551177e-06 -0.038602337 -0.032765018
## 37 782.5685 3 Tissue 6.248723e-06 -0.093052561 -0.044918363
## 38 492.3084 3 Green 6.244946e-06 0.023010874 0.040703186
## 39 518.4945 3 Tissue 6.244399e-06 -0.185855384 -0.117180887
## 40 824.6816 3 Green 6.153319e-06 -0.011079237 0.014865245
## 41 768.5160 3 Green 6.046858e-06 0.013121886 0.046176588
## 42 754.5348 3 Tissue 6.016647e-06 -0.253565024 -0.092915222
## 43 508.3049 3 Green 5.774939e-06 0.025235797 0.040902702
## 44 546.4886 3 Green 5.765558e-06 0.003536713 -0.140004598
## 45 824.5563 3 Green 5.644082e-06 -0.045021148 0.040413775
## 46 733.5571 3 Green 5.547772e-06 -0.022910478 0.038796784
## 47 681.0893 3 Tissue 5.452878e-06 0.332733908 0.180353324
## 48 879.7391 3 Green 5.275354e-06 -0.001024984 0.029758028
## 49 869.5722 3 Green 5.222157e-06 0.015984385 0.073051408
## 50 519.3248 3 Tissue 5.206831e-06 -0.036484134 -0.031957908
image(nce.pls, model=list(ncomp=comp))
scores<-nce.pls@resultData$`ncomp = 3`$scores
classes<-nce.pls@resultData$`ncomp = 3`$y
pls.scores<-data.frame(scores, Classes=classes)
ggplot(pls.scores, aes(x=C1, y=C2, color=Classes)) + geom_point()
ggplot(pls.scores, aes(x=C1, y=C3, color=Classes)) + geom_point()
No predictive power for red green tissue siganls with OPLS
hostmox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Red','Tissue')]
table(hostmox$Class)
##
## Red Tissue
## 3022 14914
ddd<-data.frame(pData(hostmox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(hostmox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
hostmox.subset<-hostmox[,rownames(pData(hostmox)) %in% coordsSubset]
table(hostmox.subset$Class)
##
## Red Tissue
## 700 700
## Assign numbers for CV groups
pixelNo<-nrow(pData(hostmox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(hostmox.subset)$cvgroup<-as.vector(randNum)
Preform opls cross validation to select best model representation
hostmox.opls.cv<-cvApply(hostmox.subset, .y = as.factor(hostmox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(hostmox.opls.cv))
Re-run opls on all the data & visualize
comp<-3
hostmox.opls<-OPLS(x=hostmox.subset,y=as.factor(hostmox.subset$Class), ncomp=comp)
summary(hostmox.opls)
## $`ncomp = 3`
## Red Tissue
## Accuracy 0.6278571 0.6278571
## Sensitivity 0.6414286 0.6142857
## Specificity 0.6142857 0.6414286
## FDR 0.3755216 0.3685756
plot(hostmox.opls)
topLabels(hostmox.opls, n=50)
## mz ncomp column coefficients loadings Oloadings
## 1 496.3401 3 Tissue 2.135694e-05 -0.804409926 0.041056184
## 2 516.3069 3 Red 1.665970e-05 0.272343094 0.209613463
## 3 497.3437 3 Tissue 1.399554e-05 -0.237158172 -0.001495411
## 4 518.4945 3 Red 1.190377e-05 0.130659058 0.508431213
## 5 823.6794 3 Red 1.155629e-05 0.205469681 0.013551730
## 6 548.3717 3 Red 1.065344e-05 0.099440642 0.140198976
## 7 567.4141 3 Red 1.050522e-05 0.167943850 0.083990148
## 8 681.0893 3 Tissue 9.794991e-06 -0.017585245 0.055703936
## 9 577.2627 3 Red 9.301900e-06 0.278233672 -0.157105921
## 10 532.2799 3 Red 9.000590e-06 0.148727969 0.007948307
## 11 778.5377 3 Tissue 8.836493e-06 -0.092017096 -0.028575930
## 12 544.3375 3 Tissue 8.608449e-06 -0.120949547 -0.092415745
## 13 922.6163 3 Tissue 8.581353e-06 -0.121042201 -0.042511603
## 14 570.3541 3 Red 8.571665e-06 0.148699445 -0.038090837
## 15 754.5378 3 Tissue 8.543638e-06 -0.187619835 -0.239867019
## 16 569.4286 3 Tissue 8.518809e-06 -0.152887015 -0.044520803
## 17 755.5405 3 Tissue 8.362629e-06 -0.109989495 -0.106059005
## 18 879.7426 3 Red 8.028165e-06 0.138673667 0.005336956
## 19 756.5536 3 Tissue 8.006730e-06 -0.114297076 -0.006131633
## 20 892.5704 3 Tissue 7.972949e-06 -0.082652393 -0.007862662
## 21 586.3269 3 Red 7.608917e-06 0.079264121 -0.019305390
## 22 540.3070 3 Red 7.452424e-06 0.089767913 0.004462370
## 23 621.4612 3 Red 7.359374e-06 0.072319916 -0.019063341
## 24 544.3397 3 Red 6.786283e-06 0.089835063 0.049933289
## 25 534.2957 3 Red 6.526617e-06 0.001407485 -0.141480730
## 26 824.5563 3 Tissue 6.467815e-06 -0.072005950 -0.068098439
## 27 632.3566 3 Tissue 6.274317e-06 -0.067904116 -0.010896090
## 28 780.5520 3 Tissue 6.246629e-06 -0.072056395 -0.077086498
## 29 868.5711 3 Tissue 6.235068e-06 -0.154365552 -0.089590609
## 30 474.2589 3 Red 5.968259e-06 0.041077446 0.025028064
## 31 825.6950 3 Red 5.904775e-06 0.056054692 -0.006053856
## 32 784.5870 3 Tissue 5.839103e-06 -0.098044902 0.003731799
## 33 806.5690 3 Tissue 5.752278e-06 -0.084138758 -0.065156686
## 34 808.5815 3 Tissue 5.585980e-06 -0.060154259 -0.008715069
## 35 478.3303 3 Tissue 5.469181e-06 -0.056128942 -0.039408418
## 36 520.3395 3 Red 5.002724e-06 0.010431996 0.015247002
## 37 829.6446 3 Tissue 4.955506e-06 -0.052361037 -0.015886919
## 38 532.2820 3 Red 4.907254e-06 0.089437591 0.042307759
## 39 806.6464 3 Tissue 4.845583e-06 -0.036900960 -0.045802780
## 40 770.5106 3 Red 4.809199e-06 0.086386406 -0.147092032
## 41 532.3012 3 Red 4.624105e-06 0.029986256 -0.012574452
## 42 869.5375 3 Red 4.545042e-06 0.097677091 -0.066348250
## 43 480.3454 3 Tissue 4.537772e-06 -0.135720267 0.009375241
## 44 895.7178 3 Red 4.526636e-06 0.047807858 -0.009315045
## 45 817.1032 3 Tissue 4.525837e-06 0.022910658 0.008408269
## 46 578.2657 3 Red 4.419961e-06 0.074741798 -0.030064171
## 47 542.3228 3 Red 4.383377e-06 0.031406106 -0.004090966
## 48 857.6752 3 Tissue 4.343194e-06 -0.042124457 -0.024858400
## 49 585.0664 3 Red 4.230839e-06 0.020223648 -0.003923420
## 50 569.4309 3 Red 4.081732e-06 0.040126145 0.001309379
## weights Oweights
## 1 -0.32230454 0.112486004
## 2 0.25141695 0.133322521
## 3 -0.21121128 0.066346772
## 4 0.17964369 0.363171497
## 5 0.17439985 0.052882160
## 6 0.16077456 0.178727880
## 7 0.15853768 0.049457430
## 8 -0.14781944 0.117258391
## 9 0.14037803 -0.257564255
## 10 0.13583087 0.043382981
## 11 -0.13335443 0.004201804
## 12 -0.12991295 -0.086750916
## 13 -0.12950402 -0.028164107
## 14 0.12935782 0.003222971
## 15 -0.12893486 -0.170723880
## 16 -0.12856016 -0.081405604
## 17 -0.12620319 -0.112708570
## 18 0.12115568 0.027540057
## 19 -0.12083220 0.019576297
## 20 -0.12032239 0.035478488
## 21 0.11482868 -0.051915672
## 22 0.11246698 -0.024628269
## 23 0.11106273 -0.056928478
## 24 0.10241404 0.045966926
## 25 0.09849533 -0.028980189
## 26 -0.09760792 -0.050150336
## 27 -0.09468779 0.030037229
## 28 -0.09426994 -0.053335500
## 29 -0.09409546 -0.068515515
## 30 0.09006896 -0.024880097
## 31 0.08911090 -0.041114925
## 32 -0.08811982 0.020434614
## 33 -0.08680952 -0.066528271
## 34 -0.08429987 -0.019220257
## 35 -0.08253721 -0.013881399
## 36 0.07549776 -0.048894814
## 37 -0.07478517 0.004574602
## 38 0.07405699 0.063186697
## 39 -0.07312629 -0.008582946
## 40 0.07257721 -0.111602717
## 41 0.06978389 -0.050217255
## 42 0.06859073 -0.053247432
## 43 -0.06848100 -0.019517521
## 44 0.06831296 -0.020704497
## 45 -0.06830089 0.042756103
## 46 0.06670308 -0.082120299
## 47 0.06615098 -0.049364161
## 48 -0.06554457 -0.002730931
## 49 0.06384899 -0.049786153
## 50 0.06159877 0.034411144
image(hostmox.opls, model=list(ncomp=comp))
scores<-hostmox.opls@resultData$`ncomp = 3`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C3, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
Still not a significant modeling attempt (fdr > 0.05)
hostsox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Green','Tissue')]
table(hostsox$Class)
##
## Green Tissue
## 788 14914
ddd<-data.frame(pData(hostsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(hostsox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
hostsox.subset<-hostsox[,rownames(pData(hostsox)) %in% coordsSubset]
table(hostsox.subset$Class)
##
## Green Tissue
## 700 700
## Assign numbers for CV groups
pixelNo<-nrow(pData(hostsox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(hostsox.subset)$cvgroup<-as.vector(randNum)
hostsox.subset.cv<-cvApply(hostsox.subset, .y = as.factor(hostsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(hostsox.subset.cv))
comp<-1
hostsox.opls<-OPLS(x=hostsox.subset,y=as.factor(hostsox.subset$Class), ncomp=comp)
summary(hostsox.opls)
## $`ncomp = 1`
## Green Tissue
## Accuracy 0.6392857 0.6392857
## Sensitivity 0.6671429 0.6114286
## Specificity 0.6114286 0.6671429
## FDR 0.3680650 0.3524962
plot(hostsox.opls)
topLabels(hostsox.opls, n=50)
## mz ncomp column coefficients loadings Oloadings
## 1 496.3401 1 Tissue 2.190190e-05 -0.834286741 -0.0027699508
## 2 518.3224 1 Tissue 1.413832e-05 -0.661216839 0.2861478194
## 3 516.3069 1 Green 1.195781e-05 0.133363640 0.4655180309
## 4 567.4141 1 Green 1.117102e-05 0.131465451 0.1832301685
## 5 532.2799 1 Green 1.060103e-05 0.161870826 0.2364735798
## 6 823.6794 1 Green 9.136929e-06 0.150889605 0.0845210769
## 7 665.4992 1 Tissue 8.296807e-06 -0.100113648 0.0080616156
## 8 839.6536 1 Green 8.217631e-06 0.124286106 0.0375487631
## 9 621.4612 1 Green 7.498719e-06 0.067450013 0.0481353912
## 10 545.0717 1 Tissue 7.375550e-06 0.001654365 -0.2262826831
## 11 681.0893 1 Tissue 7.329858e-06 0.046608625 -0.4256122007
## 12 497.3437 1 Tissue 6.446534e-06 -0.156581441 -0.0236131110
## 13 879.7426 1 Green 6.406377e-06 0.096615371 0.0533194065
## 14 570.3541 1 Green 6.265407e-06 0.094725062 0.1389407121
## 15 480.3454 1 Tissue 6.202877e-06 -0.093589049 -0.0290737032
## 16 806.5690 1 Tissue 6.167139e-06 -0.071750695 0.0404688060
## 17 532.2820 1 Green 6.018555e-06 0.073693905 0.0197033783
## 18 546.4886 1 Green 5.981118e-06 0.096477860 -0.0241823383
## 19 732.5543 1 Tissue 5.903190e-06 -0.276665868 0.2172158159
## 20 562.4833 1 Green 5.794104e-06 0.084942187 0.0005772117
## 21 569.4286 1 Tissue 5.565716e-06 -0.124675719 0.0805338809
## 22 817.1032 1 Tissue 5.534461e-06 0.046342534 -0.3170511974
## 23 532.3012 1 Green 5.401866e-06 0.044973317 0.0094036217
## 24 892.5704 1 Tissue 5.336413e-06 -0.053820090 -0.0189980687
## 25 681.4735 1 Tissue 5.210564e-06 -0.053369197 -0.0000788427
## 26 494.3251 1 Green 5.168811e-06 -0.050424746 0.0736637457
## 27 869.5409 1 Green 5.068846e-06 0.073550020 0.0248852888
## 28 778.5377 1 Tissue 4.946686e-06 -0.079799000 0.0196178338
## 29 550.3869 1 Tissue 4.447521e-06 -0.124283059 0.0313791889
## 30 585.0641 1 Tissue 4.361758e-06 0.040282256 -0.2519056637
## 31 895.7178 1 Green 4.353105e-06 0.049059295 0.0116479019
## 32 840.6551 1 Green 4.214831e-06 0.038032524 0.0052882461
## 33 518.4945 1 Tissue 3.800185e-06 -0.116155412 0.0089342539
## 34 556.2809 1 Green 3.743592e-06 0.065553143 0.0164440783
## 35 548.2752 1 Green 3.708580e-06 0.023642276 0.0018753713
## 36 533.2836 1 Green 3.688313e-06 0.052553591 0.0253268254
## 37 519.3248 1 Tissue 3.668631e-06 -0.062447281 0.0190011603
## 38 519.4972 1 Tissue 3.577097e-06 -0.043627529 -0.0063154879
## 39 510.3197 1 Green 3.416050e-06 0.010195542 -0.0106584210
## 40 643.5176 1 Tissue 3.330996e-06 -0.042901774 -0.0006004879
## 41 770.5106 1 Green 3.310994e-06 0.010928913 0.1695310979
## 42 534.2957 1 Tissue 3.310172e-06 -0.249623317 0.1266676168
## 43 794.5099 1 Tissue 3.257877e-06 -0.040255974 0.0064384661
## 44 681.0866 1 Tissue 3.209880e-06 -0.012533535 -0.0242881327
## 45 682.0926 1 Tissue 3.180273e-06 0.015624207 -0.1577486628
## 46 540.3070 1 Green 3.140796e-06 0.075542004 0.0394575209
## 47 569.4309 1 Tissue 3.127384e-06 -0.040334670 0.0464190149
## 48 784.5870 1 Tissue 3.106047e-06 -0.070455872 0.0202820852
## 49 880.7461 1 Green 2.996841e-06 0.052540406 0.0175246768
## 50 895.7142 1 Green 2.992181e-06 0.052238775 0.0297534064
## weights Oweights
## 1 -0.40920362 -0.252376606
## 2 -0.26415296 0.010747960
## 3 0.22341336 0.301694100
## 4 0.20871342 0.036591817
## 5 0.19806400 0.131892395
## 6 0.17070957 0.004980187
## 7 -0.15501319 0.114667114
## 8 0.15353390 -0.044630070
## 9 0.14010212 -0.069673691
## 10 -0.13780089 -0.047062501
## 11 -0.13694722 -0.205612922
## 12 -0.12044365 -0.014187122
## 13 0.11969338 -0.011004141
## 14 0.11705957 0.076252138
## 15 -0.11589129 0.033166823
## 16 -0.11522358 0.122193212
## 17 0.11244752 -0.056631298
## 18 0.11174807 -0.078389398
## 19 -0.11029210 0.101680736
## 20 0.10825400 -0.059871378
## 21 -0.10398692 0.098464405
## 22 -0.10340297 -0.140551409
## 23 0.10092563 -0.078831219
## 24 -0.09970275 0.059418261
## 25 -0.09735143 0.075725785
## 26 0.09657135 -0.097830454
## 27 0.09470365 -0.028704672
## 28 -0.09242129 0.064443339
## 29 -0.08309515 0.022737908
## 30 -0.08149279 -0.109302424
## 31 0.08133112 -0.047515459
## 32 0.07874770 -0.060816715
## 33 -0.07100065 -0.007729752
## 34 0.06994331 -0.012670134
## 35 0.06928916 -0.065439954
## 36 0.06891051 -0.014566212
## 37 -0.06854277 0.049202984
## 38 -0.06683260 0.039213922
## 39 0.06382368 -0.083453718
## 40 -0.06223458 0.039676410
## 41 0.06186088 0.099949840
## 42 -0.06184551 -0.026140874
## 43 -0.06086846 0.047418753
## 44 -0.05997171 0.041362370
## 45 -0.05941854 -0.066579390
## 46 0.05868098 0.034170545
## 47 -0.05843040 0.084182378
## 48 -0.05803175 0.029470245
## 49 0.05599140 -0.005722886
## 50 0.05590434 0.006337099
image(hostsox.opls, model=list(ncomp=comp))
scores<-hostsox.opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
Not a predtive model with OPLS (no t-orthoganal scores calculated )
moxsox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Green','Red')]
table(moxsox$Class)
##
## Green Red
## 788 3022
ddd<-data.frame(pData(moxsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(moxsox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
moxsox.subset<-moxsox[,rownames(pData(moxsox)) %in% coordsSubset]
table(moxsox.subset$Class)
##
## Green Red
## 700 700
## Assign numbers for CV groups
pixelNo<-nrow(pData(moxsox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(moxsox.subset)$cvgroup<-as.vector(randNum)
moxsox.subset.cv<-cvApply(moxsox.subset, .y = as.factor(moxsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(moxsox.subset.cv))
comp<-1
moxsox.opls<-OPLS(x=moxsox.subset,y=as.factor(moxsox.subset$Class), ncomp=comp)
summary(moxsox.opls)
## $`ncomp = 1`
## Green Red
## Accuracy 0.6107143 0.6107143
## Sensitivity 0.6428571 0.5785714
## Specificity 0.5785714 0.6428571
## FDR 0.3959732 0.3816794
plot(moxsox.opls)
topLabels(moxsox.opls, n=50)
## mz ncomp column coefficients loadings Oloadings
## 1 577.2627 1 Red 1.898445e-05 -0.9348025781 0.0087454935
## 2 518.3224 1 Red 1.570364e-05 -0.8680461621 0.2767757357
## 3 567.4141 1 Green 1.302543e-05 0.1113652052 0.1058539192
## 4 494.3251 1 Green 1.176194e-05 0.2943051807 0.3665062533
## 5 665.4992 1 Red 1.032269e-05 -0.0852780093 0.0202287295
## 6 681.0893 1 Red 9.539022e-06 -0.0311173249 -0.2942594605
## 7 823.6794 1 Green 9.070144e-06 0.1846168667 0.0486618857
## 8 817.1032 1 Red 8.440879e-06 -0.0074447928 -0.2261395811
## 9 546.4886 1 Green 7.562831e-06 0.1314430627 0.1304949349
## 10 755.5405 1 Red 7.083512e-06 -0.1275691036 0.0397412950
## 11 547.4731 1 Green 6.763211e-06 0.1079966352 0.0063744041
## 12 839.6536 1 Green 6.474881e-06 0.1327343627 0.0103452124
## 13 895.7178 1 Green 6.449360e-06 0.0437062793 -0.0001638785
## 14 569.4309 1 Red 6.425417e-06 -0.1449360817 0.0396003605
## 15 578.2657 1 Red 5.965041e-06 -0.2064077486 -0.0107436804
## 16 534.2957 1 Red 5.574664e-06 -0.3080696727 0.1089671557
## 17 859.6913 1 Green 5.556404e-06 0.0003340791 0.0166859299
## 18 562.4833 1 Green 5.335441e-06 0.1056025894 0.0337771755
## 19 857.0956 1 Red 5.012481e-06 0.0058696119 -0.1436634382
## 20 879.7391 1 Green 4.968246e-06 0.0181590005 0.0103639911
## 21 516.3069 1 Green 4.957690e-06 -0.0986161773 0.3093944335
## 22 733.5571 1 Green 4.729283e-06 0.0461328116 0.0862855592
## 23 585.0641 1 Red 4.684075e-06 -0.0253110804 -0.2425040386
## 24 572.3699 1 Red 4.602843e-06 -0.0627905740 0.0391476694
## 25 532.3012 1 Green 4.573289e-06 0.0018964634 0.0036623242
## 26 545.0717 1 Red 4.532697e-06 0.0149423858 -0.1408059435
## 27 732.5543 1 Red 4.444182e-06 -0.1172554073 0.2960086956
## 28 558.4859 1 Red 4.254870e-06 -0.0265024905 0.0261134963
## 29 824.6816 1 Green 3.951616e-06 0.0683280435 0.0089871383
## 30 880.7461 1 Green 3.919137e-06 0.0680624844 0.0031416892
## 31 518.3245 1 Green 3.877335e-06 0.2166695330 0.0113981441
## 32 540.3070 1 Red 3.842269e-06 -0.0088936740 0.0185064785
## 33 786.6012 1 Green 3.758817e-06 0.0378856603 0.1415546889
## 34 681.4735 1 Red 3.656944e-06 -0.0255833764 0.0042586558
## 35 496.3401 1 Green 3.654763e-06 -0.1561668761 0.3429318529
## 36 643.5176 1 Red 3.632472e-06 -0.0262226636 0.0135348796
## 37 494.3231 1 Green 3.620405e-06 -0.0035808530 -0.0030723304
## 38 730.5381 1 Green 3.587749e-06 0.0518890800 0.0342315039
## 39 532.2820 1 Green 3.536513e-06 0.0901480476 0.0416899552
## 40 682.0926 1 Red 3.530699e-06 0.0008352384 -0.1078180541
## 41 782.5685 1 Red 3.465576e-06 -0.0378687851 0.0248867929
## 42 519.4972 1 Red 3.455637e-06 0.0000842909 0.0344742169
## 43 825.6950 1 Red 3.453216e-06 0.0168740383 0.0112644377
## 44 840.6551 1 Green 3.423464e-06 0.0502866604 -0.0015783416
## 45 517.3095 1 Red 3.371007e-06 -0.0513020130 0.0285089236
## 46 569.4286 1 Green 3.283459e-06 0.0025584284 0.0472000315
## 47 860.6926 1 Green 3.279644e-06 0.0042286963 0.0037809022
## 48 413.2668 1 Red 3.275262e-06 -0.0127646070 0.0042649401
## 49 808.5815 1 Red 3.177078e-06 -0.0321089072 0.0281567874
## 50 621.4612 1 Green 3.155697e-06 0.0487938393 0.0188274489
## weights Oweights
## 1 -0.35743629 -0.137122651
## 2 -0.29566558 0.114857673
## 3 0.24524081 -0.024751695
## 4 0.22145197 0.332895841
## 5 -0.19435400 0.125030726
## 6 -0.17959920 -0.176658195
## 7 0.17077125 0.004317331
## 8 -0.15892355 -0.113332436
## 9 0.14239180 0.083729505
## 10 -0.13336725 0.081603049
## 11 0.12733667 -0.039602658
## 12 0.12190804 -0.020900751
## 13 0.12142755 -0.069808327
## 14 -0.12097674 0.064855497
## 15 -0.11230886 -0.018573372
## 16 -0.10495890 0.051522246
## 17 0.10461509 -0.059559595
## 18 0.10045485 0.006387471
## 19 -0.09437420 -0.072195760
## 20 0.09354135 -0.050037109
## 21 0.09334262 0.198312219
## 22 0.08904221 0.041345339
## 23 -0.08819104 -0.189122719
## 24 -0.08666160 0.075099280
## 25 0.08610517 -0.058387051
## 26 -0.08534091 -0.071985552
## 27 -0.08367436 0.306073252
## 28 -0.08011002 0.073075423
## 29 0.07440040 -0.015601053
## 30 0.07378890 -0.021115563
## 31 0.07300186 0.052396601
## 32 -0.07234164 0.067460071
## 33 0.07077043 0.106367134
## 34 -0.06885237 0.043400179
## 35 0.06881131 0.224713770
## 36 -0.06839162 0.052061708
## 37 0.06816441 -0.054405191
## 38 0.06754957 0.007490820
## 39 0.06658491 0.032305834
## 40 -0.06647544 -0.058913521
## 41 -0.06524931 0.056050125
## 42 -0.06506219 0.082019935
## 43 -0.06501661 0.066084585
## 44 0.06445644 -0.026757847
## 45 -0.06346879 0.052525304
## 46 0.06182045 0.003171703
## 47 0.06174862 -0.039467991
## 48 -0.06166611 0.043738342
## 49 -0.05981752 0.057860749
## 50 0.05941496 -0.003320468
image(moxsox.opls, model=list(ncomp=comp))
scores<-moxsox.opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
Also not a preditive model – no t-orthoganol scores
One other possibility that would be good to consider is that the spatial variation may alter the structure of the dataset
Image compelete maldifishmz dataset
image(skma, key = T, layout = c(1,1))
Take region between x=100 and 200
p<-pData(maldifishmz)[(pData(maldifishmz)$x > 100 & pData(maldifishmz)$x < 200 ),]
msi<-maldifishmz[,rownames(pData(maldifishmz)) %in% rownames(p)]
## Assign cross validation groups based on y-coordinates (1-50, 50-100,100-150,150-200)
msi$CV<-cut(msi$y, breaks=10, labels = F)
## Visualize two of the cuts
msi1<-msi[,msi$CV %in% c(1,2)]
pca.msi1<-PCA(msi1, ncomp=1:2)
image(pca.msi1, superpose=T)
hostmox<-msi[,msi$Class %in% c('Red','Tissue')]
table(hostmox$Class)
##
## Red Tissue
## 1639 6728
ddd<-data.frame(pData(hostmox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(hostmox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
hostmox.subset<-hostmox[,rownames(pData(hostmox)) %in% coordsSubset]
table(hostmox.subset$Class)
##
## Red Tissue
## 400 400
hostmox.cv<-cvApply(hostmox.subset, .y = as.factor(hostmox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(hostmox.cv))
comp<-9
opls<-OPLS(x=hostmox.subset,y=as.factor(hostmox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 9`
## Red Tissue
## Accuracy 0.6450000 0.6450000
## Sensitivity 0.5775000 0.7125000
## Specificity 0.7125000 0.5775000
## FDR 0.3323699 0.3722467
image(opls, model=list(ncomp=comp))
scores<-opls@resultData$`ncomp = 9`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
But the FDR is too high –> not sure how to overcome this despite being able to build a model
hostsox<-msi[,msi$Class %in% c('Green','Tissue')]
table(hostsox$Class)
##
## Green Tissue
## 410 6728
ddd<-data.frame(pData(hostsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(hostsox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
hostsox.subset<-hostsox[,rownames(pData(hostsox)) %in% coordsSubset]
table(hostsox.subset$Class)
##
## Green Tissue
## 400 400
hostsox.cv<-cvApply(hostsox.subset, .y = as.factor(hostsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(hostsox.cv))
comp<-1
opls<-OPLS(x=hostsox.subset,y=as.factor(hostsox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 1`
## Green Tissue
## Accuracy 0.6637500 0.6637500
## Sensitivity 0.6550000 0.6725000
## Specificity 0.6725000 0.6550000
## FDR 0.3333333 0.3390663
image(opls, model=list(ncomp=comp))
scores<-opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
moxsox<-msi[,msi$Class %in% c('Green','Red')]
table(moxsox$Class)
##
## Green Red
## 410 1639
ddd<-data.frame(pData(moxsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')
subset<-data.frame((pData(moxsox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')
dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)
moxsox.subset<-moxsox[,rownames(pData(moxsox)) %in% coordsSubset]
table(moxsox.subset$Class)
##
## Green Red
## 400 400
moxsox.cv<-cvApply(moxsox.subset, .y = as.factor(moxsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(moxsox.cv))
comp<-1
opls<-OPLS(x=moxsox.subset,y=as.factor(moxsox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 1`
## Green Red
## Accuracy 0.6525 0.6525
## Sensitivity 0.6525 0.6525
## Specificity 0.6525 0.6525
## FDR 0.3475 0.3475
image(opls, model=list(ncomp=comp))
scores<-opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')
image(maldifishmz, mz=869.537,plusminus=0.5)